/*********** NOTES HEADER *********** 
*This program adds quarters in which the patient is not observed with a claim to the dataset
*  We allow these people to not progress into health outcomes.
*  This should help precision. This is for the full control sample

Datasets used: 
 (1) fullListAnalysis2005-2012
 
Datasets created:
 (1)  MatchedSample_Full

************************************/ 
set more off
set matsize 10000
capture log close
clear all

global origData "N:\MedicareClaims-P045601-BE"
global dataIn "N:\MedicareClaims-P045601-BE\Work\hosp_retro\health_out\Data-In\"
global dataOut "N:\MedicareClaims-P045601-BE\Work\hosp_retro\health_out\Data-Out\"
global dpath "N:\MedicareClaims-P045601-BE\Work\ay_data"
global dataProp "N:\MedicareClaims-P045601-BE\Work\hosp_retro\health_out\Data-Out\PropScore\Patients"
global logs "N:\MedicareClaims-P045601-BE\Work\hosp_retro\health_out\Logs\PropScore\Patients"
global dpath "N:\MedicareClaims-P045601-BE\Work\ay_data"
global skapath "N:\MedicareClaims-P045601-BE\Work\ska"

adopath +  "N:/SIL-Common/estout"
adopath +  "N:/SIL-Common/outreg2"
adopath +  "N:/SIL-Common/reghdfe-master/package"

log using "./0.BuildDataset.log", replace
 
	use  "$dataOut/fullListAnalysis2005-2012.dta"
		drop if year(quarter)==2005
		drop if age<64
		drop if merger_seen==.
		*Various sample restrictions performed, here:
		* (1) limit to age >64
		* (2) limit to quarter >= 184
		* (3) limit to year > 2005

		gen nonwhite=1-RACE1

		*Drop matches where the treated patient doesn't have an associated merger date
		capture drop flag
			gen flag= q_of_merger==. if merger_seen~=.
			egen summg=max(flag), by(id)
		drop if summg==0 & merger_seen~=.

		capture drop flag summg
			gen flag= q_of_merger==. if merger_seen~=.
			 replace flag=0 if merger_seen==0
			egen summg=max(flag), by(id)
		drop if summg==1 & merger_seen~=.

		rename id propScoreId

		*Quarter and postmerger
			 replace qtr=qofd(quarter)
			format qtr %tq

			capture drop postmerger
			gen postmerger=q_of_merger<qtr  
			 replace postmerger=0 if postmerger==.
			 replace postmerger=0 if q_of_merger==.

			drop if qtr<184
			capture drop year
			gen year=year(quarter)

			gen nyear=year-2005

		*age categories
		gen p_age=age
		 replace p_age=95 if age>=95

		gen agecat=1 if  p_age>=65 & p_age<70
		 replace agecat=2 if p_age>=70 & p_age<75
		 replace agecat=3 if p_age>=75 & p_age<80
		 replace agecat=4 if p_age>=80 & p_age<85
		 replace agecat=5 if p_age>=85 & p_age<90
		 replace agecat=6 if p_age>=90 & p_age<95
		 replace agecat=7 if p_age>=95  

		*Lead and lag variables
		gen vm_age=qtr-q_of_merger
		 replace vm_age=0 if vm_age==.
		 replace vm_age=-17 if vm_age<-16
		 replace vm_age=22 if vm_age>=22

		qui summ vm_age
		local vm_mn=-r(min)
		replace vm_age = vm_age + `vm_mn'

		*Sample limitations
		gen flag1 = 1
		gen flag2 = mxhypertension_p_all 
		gen flag3 = mxdiabetes_p_all 

		*Death
		capture drop oc_death
		egen death_date=min(BENE_DEATH), by(BENE_ID)
		 format death_date %td
		drop if quarter> death_date
		 sort BENE_ID quarter
		 by BENE_ID: gen oc_death=death_date~=. & _n==_N

		*OUTCOME VARIABLES  
		drop new*
		 sort BENE_ID quarter
			foreach x of varlist mx*    {
			if regexm("`x'","^mx([A-Za-z0-9]+_[A-Za-z0-9]+).*") { 
			 local var "`=regexs(1)'"
			 di "`var'" 
			by BENE_ID : gen new`var' = (`x' == 1 & `x'[_n-1] == 0 )
			 }
			}
		rename mxdead_p oc_death


		*Heart Disease
		*Keep variables since modeling like a hazard model

		gen kv_death=1
		gen kv_acuteicd8 =1
		gen kv_ami       =1
		gen kv_ihd       =1

		rename  mxicd8_acute_p_all    oc_acuteicd8
		rename  mxicd_ami_p_all           oc_ami       
		rename  mxicd_ihd_p_all 		oc_ihd       

		replace kv_acuteicd=0 if newicd8_acute     ==0  & oc_acuteicd8==1
		replace kv_ami=0      if newicd_ami          ==0  & oc_ami==1  
		replace kv_ihd=0      if newicd_ihd==0  & oc_ihd==1
		 
		*Diabetes
		gen kv_diabcomp1 =1
		gen kv_diabcomp2 =1
		gen kv_glaucoma  =1
		rename  mxdiab_compl1_p_all   oc_diabcomp1
		rename  mxdiab_compl2_p_all   oc_diabcomp2
		rename  mxicd_glaucoma_p_all      oc_glaucoma  

		replace kv_diabcomp1=0 if newdiab_compl1==0 & oc_diabcomp1==1
		replace kv_diabcomp2=0 if newdiab_compl2==0 & oc_diabcomp2==1 
		replace kv_glaucoma=0  if newglaucoma_p ==0 & oc_glaucoma==1

 *Weights (at one point had tried using a sample weight using the lor)
		*First get the propensity score from the log odds ratio
		*gen escore= 1/(1+exp(lor))
		*Next create the sampling weight from the propensity score
		*gen samp_weight=1/escore if merger==1
		*replace samp_weight=1/(1-escore) if merger~=1
		*don't use the sample weight
		 gen samp_weight=1
		 replace samp_weight=1


*Geography
*3digit zip
	sort BENE_ID quarter
		foreach var of varlist zip_pat fips_state STATE_CODE county BENE_COUNTY {
			 by BENE_ID: replace `var'=`var'[_n+1] if `var'==""
			 by BENE_ID: replace `var'=`var'[_n-1] if `var'==""
			 replace `var'="000000000" if `var'==""
		}

		gen ZIP3=substr(zip_pat,1,3)
		encode ZIP3, gen(zip3dig)
  
*Quarter and postmerger
		 replace qtr=qofd(quarter)
		format qtr %tq
		capture drop year
		gen year=year(quarter)
		capture drop nyear
		gen nyear=year-2006
 
*mergers
		capture drop VMP* pm*
		 replace vm_provider1=0 if vm_provider1==. & vm_provider2==. & vm_provider3==. & merger_seen==1
		levelsof vm_provider1 if !inlist(vm_provider1, 1,12,20,22,34,37 ), local(vmp)
		foreach i of local vmp {
			gen VMP`i' = vm_provider1==`i'
			 replace VMP`i' =1 if vm_provider2==`i'
			 replace VMP`i' =1 if vm_provider3==`i'
		}

		capture drop VMP0
		gen VMP0 = 0
		levelsof vm_provider1 if inlist(vm_provider1, 1,12,20,22,34,37 ), local(vmp0)
		foreach i of local vmp0 {
			replace VMP0 =1 if vm_provider1==`i'
			replace VMP0 =1 if vm_provider2==`i'
			replace VMP0 =1 if vm_provider3==`i'
		}

		foreach var of varlist VMP* {
			 replace `var'=0 if `var'==.
			gen pm`var'=postmerger * `var'
		}



		gen vmerger=merger_seen

		*Specialty
			foreach var of varlist psps* {
				local i = subinstr("`var'","psps","",.)
				di "`i'"
				rename `var' pspcl`i'
			}
				rename pspOther pspclOther


*State
*destring STATE_encode, gen(state)
*Note that State rarely varies within propScoreId, so controlling for it isn't that important
		capture drop STATE_encode
		encode fips_state, gen(STATE_encode)
		gen state=STATE_encode
		 replace state=0 if inlist(state,  38 , 49 ,  9 , 46 , 29 , 2 ,  30, 25, 6 , 27 , 52 , 41 ,  13 , 8 , 28, 32 , 40, 47 , 12)
		 replace state=0 if inlist(state,  44 , 50 , 33 , 43 , 22 , 16 ,  3 ,  1 , 20 , 19 , 31 , 11 , 37 , 21, 5)
		gen urban_rural1=urban_rural==1

		 *Lead and lag variables
		*Leads and lags
		capture drop leadlag
		gen leadlag=qtr-q_of_merger
		 replace leadlag=0 if leadlag==.
		 replace leadlag=16 if leadlag>16
		 replace leadlag=-16 if leadlag<-16
		 replace leadlag= leadlag+16

		tab leadlag

		capture drop sex_pat
		gen sex_pat=SEX1+2*SEX2
 
*Cleanup (some chapters don't have chronic conditions or are rarely observed) - ICD chapter 6 is small, so pooling with 17
		gen mxicdOther_chronic=mxicd17_ch
		 replace mxicdOther_chronic=1 if mxicd6_ch
	drop mxicd1_chronic_p  mxicd5_chronic_p mxicd12_chronic_p mxicd15_chronic_p mxicd16_chronic_p mxicd18_chronic_p mxicd6_chronic_p  mxicd17_ch

	save ./MatchedSample_Full.dta, replace
 
 